# add census tracts, neighborhood, and coordinates to parcels
# -------------------------------------------------------------------- #
# prelims ####
# -------------------------------------------------------------------- #
# clear environment
rm(list = ls())

# libraries
library(haven)
library(rgdal)
library(raster)
library(readxl) 
library(classInt)
library(Hmisc) 
library(GISTools) 
library(tidyverse)
library(units)
library(sf) 

# paths
  # main directory
dir <- paste0("PATH_TO_MAIN_DIRECTORY_HERE")
setwd(dir)
  # barcelona data extract
city <- "08_900" 
date <- "2020-04-24"
gis_path <- paste0(dir, "orig/catastro/", city, "_UH_", date, "_SHF/PARCELA/")
SecCenPath <- paste0(dir,"orig/aj_barcelona/0301040100_BCN_UNITATS_ADM/0301040100_SecCens_ADM_ETRS89.shp")

# -------------------------------------------------------------------- #
# prepare data ####
# -------------------------------------------------------------------- #

# shape file
PlotShp <- st_read(stringsAsFactors = FALSE,
                   dsn = path.expand(sprintf("%s", gis_path, "PARCELA"))
                   , query="SELECT REFCAT FROM \"PARCELA\" WHERE FECHABAJA >= 30000000 AND PARCELA != '09000'") %>%
  st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs') %>%
  distinct(REFCAT, .keep_all = TRUE) 

# -------------------------------------------------------------------- #
# get coordinates ####
# -------------------------------------------------------------------- #

# transform to lat/lon
PlotShp <- PlotShp %>% st_transform(crs = '+proj=longlat +datum=WGS84')

# centroid
PlotShp$cent <- st_centroid(PlotShp$geometry,TRUE)

# coordinates
Coord <- st_coordinates(PlotShp$cent)
PlotShp <- cbind(PlotShp, Coord)
names(PlotShp)[names(PlotShp)=="X"] <- "Lon"
names(PlotShp)[names(PlotShp)=="Y"] <- "Lat"

# no geo
PlotShp$geometry <- NULL

PlotShp <- PlotShp %>% distinct(REFCAT, .keep_all = TRUE)
names(PlotShp)[names(PlotShp)=="REFCAT"] <- "PlotCode"

PlotShp <- PlotShp %>% st_as_sf() %>%
  st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs')

# -------------------------------------------------------------------- #
# merge with census ####
# -------------------------------------------------------------------- #

firstyear <- 2019
lastyear <- 2002
listyears <- c(firstyear:lastyear)

for(year in listyears){

  # specific year path
  if(year>2011){ 
    cen_path <- paste0(dir,"orig/ine/Cartografia Secciones Censales/seccionado_", year, "/")
    censusfile <- paste0("SECC_CE_", year, "0101")
    varselect <- c("CSEC","CDIS","CMUN","CPRO","CCA","CUSEC")
  }else if (year==2011) { 
    cen_path <- paste0(dir, "orig/ine/cartografia_censo", year,"_nacional/")
    censusfile <- "SECC_CPV_E_20111101_01_R_INE"
    varselect <- c("CSEC","CDIS","CMUN","CPRO","CCA","CUSEC")
  }else if (year>=2002 & year <=2010){ 
    censusfile <- paste0("bseccenv10sh1f1_", year,"0101_0")
    cen_path <- paste0(dir, "orig/idescat/Seccions Censals 2002-2016/", censusfile)
    varselect <- c("MUNICIPI","DISTRICTE","SECCIO","geometry")
  }
    
  # retrieve census spatial data
  CensShp <- st_read(dsn = path.expand(sprintf("%s", cen_path, censusfile)))  
  CensShp <- CensShp[c(varselect)] %>%
    st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs')

  # calculate area
  CensShp$AreaSqm <- st_area(CensShp) %>% as.integer()

  if(year>=2002 & year <=2010){

    CensShp$CPRO <- substr(CensShp$MUNICIPI,start = 1,stop = 2)
    CensShp$CMUN <- substr(CensShp$MUNICIPI,start = 3,stop = 5)
    CensShp$CCA <- "09"
    names(CensShp)[names(CensShp)=="DISTRICTE"] <- "CDIS"
    names(CensShp)[names(CensShp)=="SECCIO"] <- "CSEC"
    CensShp$CUSEC <- paste0(CensShp$CPRO,CensShp$CMUN,CensShp$CDIS,CensShp$CSEC)
    CensShp[!names(CensShp) %in% c("MUNICIPI")]
    CensShp <- CensShp %>% select(-c("MUNICIPI"))
    
  }
  
  # Intersect catastro with census data
  SecData <- st_intersection(PlotShp, CensShp)
  SecData <- SecData %>% distinct(PlotCode, .keep_all = TRUE)

  # add year
  SecData$year <- year
  
  # store R data
  filename_R <- paste0(dir,"data/temp/catastro_census_", city,"_", year, ".RData")
  save(SecData, file = filename_R)
  
  # remove files to save space
  rm(list=c("SecData","CensShp"))
  
} 

# -------------------------------------------------------------------- #
# append years ####
# -------------------------------------------------------------------- #

SecData <- NULL
SecDataYears <- NULL

# census tracts change over time
for(year in listyears){ 
  
  filename_R <- paste0(dir, "data/temp/catastro_census_", city,"_", year, ".RData")
  load(filename_R)
  
  SecData$cent <- NULL

  if(year == firstyear){ 
    SecDataYears <- SecData
  }else{ 
    SecDataYears <- rbind(SecDataYears, SecData)
  }

  save(SecDataYears, file = paste0(dir, "data/int/catastro_census_", city, ".RData"))
  
  # store in stata
  SecDataYears$CCA <- as.character(SecDataYears$CCA)
  SecDataYears$CPRO <- as.character(SecDataYears$CPRO)
  SecDataYears$CMUN <- as.character(SecDataYears$CMUN)
  SecDataYears$CDIS <- as.character(SecDataYears$CDIS)
  SecDataYears$CSEC <- as.character(SecDataYears$CSEC)
  SecDataYears$CUSEC <- as.character(SecDataYears$CUSEC)

} 

# -------------------------------------------------------------------- #
# add barri ####
# -------------------------------------------------------------------- #

# cartography of Barcelona Districts and Census Tracts
varscensus <- c('BARRI','AEB','ZUA','LITERAL')
GisDataSecCen <- st_read(dsn = SecCenPath) %>%
  select(c(all_of(varscensus))) %>%
  st_transform('+proj=utm +zone=31 +ellps=GRS80 +units=m +no_defs')

# interesect
Y <- st_intersection(PlotShp, GisDataSecCen) 
Y$cent <- NULL

# add census tract info
SecDataYears_NMatch <- merge(x = SecDataYears, y = Y[c("PlotCode","BARRI","AEB","ZUA","LITERAL")], by = c("PlotCode")) %>%
  distinct(PlotCode, year, .keep_all = TRUE) 

# store R
filename_R <- paste0(dir, "data/int/catastro_census_neigh_", city, ".RData")
save(SecDataYears_NMatch, file = filename_R)

# store stata
SecDataYears_NMatch$CCA <- as.character(SecDataYears_NMatch$CCA)
SecDataYears_NMatch$CPRO <- as.character(SecDataYears_NMatch$CPRO)
SecDataYears_NMatch$CMUN <- as.character(SecDataYears_NMatch$CMUN)
SecDataYears_NMatch$CDIS <- as.character(SecDataYears_NMatch$CDIS)
SecDataYears_NMatch$CSEC <- as.character(SecDataYears_NMatch$CSEC)
SecDataYears_NMatch$CUSEC <- as.character(SecDataYears_NMatch$CUSEC)

filename_dta <- paste0(dir, "data/int/catastro_census_neigh_", city, ".dta")
write_dta(data = SecDataYears_NMatch, filename_dta)

# erase temp files
for(year in listyears){
  file.remove(paste0(dir, "data/temp/catastro_census_08_900_", year, ".RData"))
}

# -------------------------------------------------------------------- #
# closing ####
# -------------------------------------------------------------------- #
rm(list = ls())
